# Replication File

## Article: Populist positions in party competition: Do parties strategically vary their degree of populism in reaction to vote and office loss?
## Author: Magdalena Breyer
## Journal: Party Politics
## Date: 12 May 2022

# Packages data wrangling and analysis
library(tidyverse) 
library(readxl)
library(fuzzyjoin)
library(lubridate) 
library(overviewR)
library(kableExtra)
library(estimatr) 
library(ggeffects)
library(lm.beta) 
library(modelsummary)
library(margins)
library(rigr)
library(gridExtra)
library(allespaletti) #party colors

# data on legislator and parliamentary sessions 
library(legislatoR)

# Text analysis
library(quanteda)
library(stringr)
# Johann Gründl (2020)
library(regexhelpeR)
library(multidictR)
library(popdictR)

theme_set(theme_classic())

# A. DATA IMPORT AND MERGING-------------

## Parl Speech -------------

# Data: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/L4OAKN
corp_aut <- read_rds("data_raw/Corp_Nationalrat_V2.rds") 
corp_deu <- read_rds("data_raw/Corp_Bundestag_V2.rds")

# convert date variable into date format using lubridate
# and convert speechnumber in int for DEU
corp_aut <- corp_aut %>%
  mutate(date = as_date(date),
         iso3country = "AUT")
corp_deu <- corp_deu %>%
  mutate(date = as_date(date),
         speechnumber = as.integer(speechnumber))

## LegislatoR for session dates---------

## Core data AUT
aut_politicians <- get_core("aut")

# Politicians who were (also) active in session 20 up
aut_politicians_subset <- semi_join(x = aut_politicians,
                                    y = filter(get_political("aut"), session >= 20),
                                    by = "pageid")

## Combine with political data (session, party, constituency)
aut_pol <- left_join(x = aut_politicians_subset,
                     y = get_political("aut"),
                     by = "pageid") %>%
  filter(session >= 20) #only 20 onwards

## Combine with Parlspeech IDs 
aut_legislator <- left_join(x = aut_pol,
                            y = select(get_ids("aut"), wikidataid, parlspeech),
                            by = "wikidataid") %>%
  filter(!is.na(parlspeech))

## DEU
deu_politicians <- get_core("deu")

## Combine with political data (session, party, constituency)
deu_pol <- left_join(x = deu_politicians,
                     y = get_political("deu"),
                     by = "pageid") %>%
  filter(session >= 12) #only 12 onwards

## IDs: some are duplicate, thus only unique cases
deu_ids <- unique(select(get_ids("deu"), wikidataid, parlspeech))

## Combine with Parlspeech IDs 
deu_legislator <- left_join(x = deu_pol,
                            y = deu_ids,
                            by = "wikidataid") %>%
  mutate(
    parlspeech = if_else(is.na(parlspeech), name, parlspeech)
  ) %>% 
  unique()

## Create session ID
session_dates_deu <- deu_legislator %>%
  select(session_start, session_end) %>%
  unique() %>%
  mutate(session = (min_rank(session_start)+11)
  )

session_dates_aut <- aut_legislator %>%
  select(session_start, session_end) %>%
  unique() %>%
  mutate(session = (min_rank(session_start)+19)
  )

## and join it to corp dataframes
corp_deu <- corp_deu %>%
  fuzzy_left_join(y = session_dates_deu,
                  by = c(
                    "date" = "session_start",
                    "date" = "session_end"
                  ),
                  match_fun = list(`>=`, `<`)
  ) %>% 
  select(-session_start, -session_end)


corp_aut <- corp_aut %>%
  fuzzy_left_join(y = session_dates_aut,
                  by = c(
                    "date" = "session_start",
                    "date" = "session_end"
                  ),
                  match_fun = list(`>=`, `<`)
  ) %>% 
  select(-session_start, -session_end)

# JOIN corpus (parlspeech) and legislatoR datasets
corp_deu_leg <- left_join(x = corp_deu,
                          y = deu_legislator,
                          by = c("speaker" = "parlspeech",
                                 "session" = "session")) %>%
  rename(
    party_group = party.x,
    party = party.y
  )

corp_aut_leg <- left_join(x = corp_aut,
                          y = aut_legislator,
                          by = c("speaker" = "parlspeech",
                                 "session" = "session")) %>%
  rename(
    party_group = party.x,
    party = party.y
  )

## ParlGov --------------

# Data: https://www.parlgov.org/2018/05/01/parlgov-2018-released/

party_all <- read_excel("data_raw/parlgov.xlsx",
                        sheet = "party")

cabinet_all <- read_excel("data_raw/parlgov.xlsx",
                          sheet = "cabinet")

election_all <- read_excel("data_raw/parlgov.xlsx",
                           sheet = "election")

# filter countries, election type: AUT, DEU, parliament. Exclude cabinets before 1980
# select relevant columns
party <- party_all %>%
  filter(country_name_short %in% c("AUT", "DEU")) %>%
  select(!(left_right:chess), -party_name_ascii)

cabinet <- cabinet_all %>%
  filter(country_name_short %in% c("AUT", "DEU"),
         start_date > "1980-01-01") %>%
  select(election_date:election_seats_total, left_right, election_id:party_id)

election <- election_all %>%
  filter(country_name_short %in% c("AUT", "DEU"),
         election_type == "parliament")
#select(election_type:seats_total, election_id:party_id, -previous_cabinet_id)

# party id as character
party <- party %>%
  mutate(party_id = as.character(party_id))

cabinet <- cabinet %>%
  mutate(party_id = as.character(party_id))

election <- election %>%
  mutate(party_id = as.character(party_id))

# merge different parlgov data into one df

# party and cabinet
# if multiple cabinets per election, this election appears twice

parlgov <- inner_join(party, cabinet, 
                      by = c("party_id"))

# Election data
# CDU and CSU together
## change ID and name, then group for joint election result
election <- election %>%
  mutate(party_id = if_else(party_id %in% c("808", "1180"), "1727", party_id),
         party_name_short = if_else(party_id == "1727", "CDU+CSU", party_name_short),
         party_name_english = if_else(party_id == "1727", "Christian Democratic Union / Christian Social Union", 
                                      party_name_english),
         party_name = if_else(party_id == "1727", "Christlich Demokratische Union / Christlich Soziale Union",
                              party_name)
  )

election_grouped <- election %>%
  group_by(party_id, election_id, previous_parliament_election_id) %>%
  summarize(vote_share = sum(vote_share))

# merge grouped election and rest: 
# left join leads to grouping based on cabinets. This excludes parties not in parl.
parlgov_autdeu <- left_join(parlgov, election_grouped, by = c("election_id", "party_id"))

# Other fixes
# Green party DEU had two parlgov ids: use 772 for all instead of 772 and 255
parlgov_autdeu <- parlgov_autdeu %>%
  mutate(party_id = if_else(party_id == "255", "772", party_id))

# Seat percentage 
parlgov_autdeu <- parlgov_autdeu %>%
  mutate(
    seat_per = (seats / election_seats_total) * 100
  )


# Lagged vote share and cabinet party

# only cabinets
parlgov_autdeu_l <- parlgov_autdeu %>%
  select(cabinet_id, cabinet_party, party_id
  ) %>%
  rename(previous_cabinet_id = cabinet_id,
         previous_cabinet_party = cabinet_party,
  ) %>%
  right_join(parlgov_autdeu,
             by = c("party_id", "previous_cabinet_id"
             )) %>%
  relocate(party_id, .after = country_id) %>%
  relocate(starts_with("previous_"), .after = vote_share)

# only elections
parlgov_autdeu_l2 <- parlgov_autdeu %>%
  select(
    election_id, vote_share, party_id, seat_per
  ) %>%
  rename(
    previous_vote_share = vote_share,
    previous_parliament_election_id = election_id,
    previous_seat_per = seat_per
  ) %>%
  right_join(parlgov_autdeu,
             by = c("party_id",
                    "previous_parliament_election_id"
             )) %>%
  distinct() # some rows are duplicated

# fix seats: as FPÖ and BZÖ split after 2002 election, prev seat per not unique for next election: change manually
parlgov_autdeu_l2 <- parlgov_autdeu_l2 %>%
  mutate(
    previous_seat_per = if_else(election_id == 197 & party_id == "50",((6/183) * 100), previous_seat_per)
  ) %>%
  distinct()

#combine
parlgov_autdeu_both <- parlgov_autdeu_l2 %>%
  select(previous_parliament_election_id, previous_vote_share, previous_seat_per, party_id, election_id) %>%
  right_join(parlgov_autdeu_l,
             by = c("party_id", "election_id", "previous_parliament_election_id")) %>%
  relocate(party_id, .after = country_id) %>%
  relocate(election_id, .after = cabinet_id) %>%
  relocate(starts_with("previous_"), .after = seat_per) %>%
  distinct()

# Austrian Greens. German FDP: previous results/cabinet position missing, when not in parliament previously
parlgov_autdeu_both <- parlgov_autdeu_both %>%
  mutate(
    #party_id = as.numeric(party_id),
    previous_vote_share = ifelse(election_id == 1080 & party_id == "1429", 3.8, previous_vote_share), # AT Greens 2017
    previous_vote_share = ifelse(election_id == 1016 & party_id == "2253", 4.7, previous_vote_share), # Afd 2013
    previous_vote_share = ifelse(election_id == 1016 & party_id == "543", 4.8, previous_vote_share), # FDP 2013
    previous_cabinet_party = ifelse(cabinet_id == 1608 & party_id == "1429", 0, previous_cabinet_party), # AT Greens 2017
    previous_cabinet_party = ifelse(cabinet_id == 1515 & party_id == "2253", 0, previous_cabinet_party), # Afd 2013
    previous_cabinet_party = ifelse(cabinet_id == 1515 & party_id == "543", 0, previous_cabinet_party), # FDP 2013
    previous_seat_per = ifelse(election_id == 1080 & party_id == "1429", 0, previous_seat_per), # AT Greens 2017
    previous_seat_per = ifelse(election_id == 1016 & party_id == "2253", 0, previous_seat_per), # Afd 2013
    previous_seat_per = ifelse(election_id == 1016 & party_id == "543", 0, previous_seat_per) # FDP 2013
  )

parlgov_autdeu <- parlgov_autdeu_both

## PartyFacts--------

#download and read Party Facts mapping table
file_name <- "data_raw/partyfacts-mapping.csv"
if( ! file_name %in% list.files()) {
  url <- "https://partyfacts.herokuapp.com/download/external-parties-csv/"
  download.file(url, file_name)
}
partyfacts_raw <- read_csv(file_name, guess_max = 30000)
partyfacts <- partyfacts_raw %>% filter(! is.na(partyfacts_id))


## 1. link ParlSpeech and ParlGov
dataset_1 <- partyfacts_raw %>% filter(dataset_key == "parlgov",
                                       country %in% c("AUT", "DEU"))
dataset_2 <- partyfacts_raw %>% filter(dataset_key == "parlspeech",
                                       country %in% c("AUT", "DEU"))

## Grüne DEU have old party facts ID (10) in parlspeech-partyfacts subset, should be 1816
dataset_2 <- dataset_2 %>%
  mutate(partyfacts_id = if_else(partyfacts_id == 10, 1816, partyfacts_id))

link_table <-
  dataset_2 %>%
  left_join(dataset_1, by = c("partyfacts_id" = "partyfacts_id"))

## 2. Merge partyfacts with parl gov

partyf_parlgov <- link_table %>%
  left_join(parlgov_autdeu, by = c("dataset_party_id.y" = "party_id")) %>%
  select(partyfacts_id, dataset_party_id.y, year_first.y:year_last.y, country_name,
         party_name_english:previous_cabinet_party)

## 3. Prep to merge df corp (Parlspeech) with parl gov
# cabinet id and date list
c_id <- parlgov_autdeu %>%
  filter(election_date >= "1990-01-01") %>%
  select(country_name, election_date, start_date, cabinet_id) %>%
  group_by(country_name, election_date, start_date, cabinet_id) %>%
  unique() %>%
  arrange(country_name, start_date) %>%
  group_by(country_name) %>%
  mutate(end_date = lead(start_date),
         end_date = if_else(is.na(end_date), as.character(Sys.Date()), end_date)
  ) %>%
  ungroup() %>% select(-election_date)

c_aut <- filter(c_id, country_name == "Austria") %>%
  select(-country_name)

c_deu <- filter(c_id, country_name == "Germany") %>%
  select(-country_name)

# fuzzy join cabinet id to corp

corp_deu_c <- corp_deu_leg %>%
  fuzzy_left_join(y = c_deu,
                  by = c(
                    "date" = "start_date",
                    "date" = "end_date"
                  ),
                  match_fun = list(`>=`, `<`)
  ) %>% 
  select(-start_date, -end_date)


corp_aut_c <- corp_aut_leg %>%
  fuzzy_left_join(y = c_aut,
                  by = c(
                    "date" = "start_date",
                    "date" = "end_date"
                  ),
                  match_fun = list(`>=`, `<`)
  ) %>% 
  select(-start_date, -end_date)

corp <- bind_rows(corp_deu_c, corp_aut_c)

# Party name changes - change coding
## Die Linke/PDS has partyfact IDs: 86 (until 2007), 1545

corp <- corp %>%
  mutate(party.facts.id = if_else(party.facts.id == 86, 1545, party.facts.id), #Linke
         party_group = if_else(party.facts.id == 6137, "Jetzt - Liste PILZ", party_group)
  )


# 4. Merge df corp (Parlspeech) with parl gov

complete <- corp %>%
  left_join(partyf_parlgov, by = c("party.facts.id" = "partyfacts_id",
                                   "cabinet_id" = "cabinet_id"))

## Rename and re-order columns
complete <- complete %>%
  select(-year_first.y, -year_last.y, -country, -country_name) %>% #country vars incomplete (except parlspeech iso3c)
  rename(
    parlgov_id = dataset_party_id.y
  )
## Exclude duplicates
complete <- complete %>% group_by(date, speaker, speechnumber, terms, text) %>% 
  distinct()

# B. PREP FOR DICTIONARY ANALYSIS------

## data without party group NA and chairs

speeches <- complete %>%
  ungroup() %>%
  filter(chair == FALSE,
         !is.na(party_group))

rm(complete)

# Populism variable (Popu-List), exclude Liste Pilz

speeches <- speeches %>%
  filter(party_group!= "Jetzt - Liste PILZ") %>% 
  mutate(
    party_type = if_else(party_group %in% c("AfD", "PDS/LINKE", "BZÖ", "FPÖ", "STRONACH"),
                         "populist", "mainstream"),
    party_type = factor(party_type)
  )

# Missing parlgov data (legislators who founded parties/switched in sessions where these parties did not enter parliament)
# remove them
speeches <- filter(speeches,
                   !is.na(cabinet_party))

# C. DICTIONARY ANALYSIS---------

## assign doc_id
speeches <- speeches %>%
  mutate(
    doc_id = paste0("text", row_number())
  )


## Apply Gründl dictionary ----
### (applied on corpus)
# restrict corpus only to text/doc_id corpus, join results to other docvars later
speech_corp_s <- speeches %>%
  select(doc_id, text) %>%
  corpus(docid_field = "doc_id",
         text_field =  "text" 
  )


# PopdictR function
memory.limit(50000)

gdict <- run_popdict(speech_corp_s,
                     dict_version = "current",
                     at_level = "sentences",
                     return_value = "count_at_level",
                     include_totals = T,
                     return_result_only = F
)

# convert to data frame
df_gdict <- gdict %>%
  convert(to = "data.frame") %>%
  select(doc_id, dict_gruendl_2020, n_sentences, n_tokens) %>%
  rename(gruendl_sentcount = dict_gruendl_2020) %>% #different processing? different token n 
  mutate(
    gruendl_sent_score = (gruendl_sentcount/n_sentences) * 100
  )

# join to docvars
df_dict <- left_join(df_gdict, speeches, by = "doc_id")


## Short statements: exclude ------
#(5 sent or less, 85,768 speeches excluded)

df_dict <- filter(df_dict, n_sentences > 5)

summary(df_dict2$n_sentences)

# D. RECODING VARIABLES, MONTHLY AGGREGATING-------

## Add month variable
df_dict <- df_dict %>%
  mutate(
    month = paste0(format(date, "%Y-%m"), "-01"),
    ## Var types changes
    speechnumber = as.character(speechnumber),
    session = as.character(session),
    cabinet_id = as.character(cabinet_id),
    election_id = as.character(election_id)
  )

# span between election and date (ecycle calculation after grouping)
df_dict <- df_dict %>% 
  mutate(
    espan = as.period(date %--% election_date),
    espan2 = as.duration(espan)/ dyears(1),
  )

#Populist parties as ref category, Opposition is ref
df_dict <- df_dict %>%
  mutate(
    party_type = relevel(party_type, "populist"),
    party_direction = fct_collapse(party_group,
                                   "mainstream left" = c("SPD", "SPÖ", "GRUENE", "Grüne"),
                                   "mainstream right" = c("ÖVP", "CDU/CSU", "FDP"),
                                   "populist right" = c("AfD", "BZÖ", "FPÖ", "STRONACH"),
                                   "populist left" = "PDS/LINKE"),
    party_direction = fct_recode(party_direction, 
                                 NULL = "LIF",
                                 NULL = "NEOS"),
    cabinet_party = fct_recode(factor(cabinet_party), 
                               "opposition" = "0",
                               "government" = "1"),
    previous_cabinet_party = fct_recode(factor(previous_cabinet_party), 
                                        "opposition" = "0",
                                        "government" = "1")
  )

# vote change and loss (also seats)

df_dict <- df_dict %>%
  mutate(
    vote_change = vote_share - previous_vote_share,
    electoral_loss = if_else(vote_change < 0, -(vote_change), 0),
    vote_change_2 = if_else(vote_change < 0, "loss", "win"), # 2 categories, win or loss
    vote_change_2 = relevel(factor(vote_change_2), "loss"),
    vote_change_4 = cut(vote_change, # 4 categories, 
                        breaks = c(-17, -3, 0, 3, 16), 
                        include.lowest=TRUE,
                        right = T, #interval closed on right
                        labels = c("large loss", "small loss", "small win", "large win")
    ))


# Populist success

pop_suc <- df_dict %>%
  group_by(iso3country, election_id, party_group) %>%
  summarize(vote_share = first(vote_share)) %>%
  pivot_wider(names_from = party_group, values_from = vote_share) %>%
  rowwise() %>%
  mutate(populist_success =  sum(AfD, `PDS/LINKE`, FPÖ, BZÖ, STRONACH, na.rm=TRUE)
  ) %>%
  ungroup() %>%
  select(election_id, populist_success)

df_dict <- df_dict %>%
  left_join(pop_suc, by = "election_id") %>% 
  mutate(populist_success_c = scale(populist_success, center = T, scale = F))

# Decades
vdec <- c(1989, 1999, 2009, 2019)

## TABLE 1------

#year variable
df_dict <- df_dict %>%
  mutate(year = lubridate::year(date))

# combine overviewR and dplyr

party_year <- overview_tab(dat = df_dict,
                           id = party_group,
                           time = year)


type <- df_dict %>%
  group_by(iso3country, party_group, party_type) %>%
  count()

party_year <- left_join(party_year,
                        type,
                        by = "party_group") %>%
  select(iso3country, party_group, party_type, time_frame, n) %>%
  rename(Country = iso3country,
         Party = party_group,
         Classification = party_type,
         "Time frame" = time_frame) %>%
  arrange(Country, Party)

party_year

# Export to latex
party_year %>% select(-Country) %>% 
  kbl(
    #format = "latex", 
    booktabs = T,
    format.args = list(big.mark = ",")) %>% 
  kable_styling() %>%
  pack_rows(index = table(party_year$Country))

## Group by month----
### Populism percentage is now monthly average of populism percentage in speeches
#### ecycle: calculate after grouping
df_m <- df_dict %>%
  group_by(iso3country, session, party_type, party_direction, party_group, month,
           cabinet_id, cabinet_party, prime_minister, election_id,
           previous_cabinet_party, vote_change_2, vote_change_4) %>%
  add_tally() %>%
  summarize_if(is.numeric, mean, na.rm = T) %>%
  filter(n > 2) %>%  #remove months with 2 speeches or less
  ungroup() %>%
  mutate(time = as.Date(month, "%Y-%m-%d"),
         year = year(month),
         year_center = as.integer(year - mean(year)),
         decade = cut(year, vdec),
         ecycle = cut(-espan2, breaks = c(0,1,2,3,4),
                      labels = FALSE)
  )

df_m$decade <- fct_recode(df_m$decade,
                          "1990s" = "(1.99e+03,2e+03]",
                          "2000s" = "(2e+03,2.01e+03]",
                          "2010s" = "(2.01e+03,2.02e+03]")


# number of electoral losses by party type and parties
loss_count <- df_m %>% 
  group_by(party_group, party_type, election_id, vote_change_4, vote_change_2) %>% 
  count()

table(loss_count$party_type, loss_count$vote_change_4)
table(loss_count$party_type, loss_count$vote_change_2)
table(loss_count$party_group, loss_count$vote_change_4)
table(loss_count$party_group, loss_count$vote_change_2)

# E. ANALYSIS OF PARTY POSITIONS-------

## Results descriptive

df_m %>%
  group_by(party_type) %>%
  summarise(
    mean_pop = mean(gruendl_sent_score),
    median_pop = median(gruendl_sent_score)
  )

# Boxplots Gruendl
fg <- ggplot(df_m, aes(party_type, gruendl_sent_score))
gg <- ggplot(df_m, aes(iso3country, gruendl_sent_score))


## FIGURE 1-----

violin <- fg + geom_violin(draw_quantiles = c(0.5), scale = "count", fill = "gray")+
  labs(x=NULL, y="Populist sentences in %")
violin

## FIGURE B1 Appendix ----- 

app1 <- gg + geom_violin(aes(fill = party_type),
                         draw_quantiles = c(0.5), scale = "count")+
  labs(x=NULL, y="Populist sentences in %") +
  scale_fill_brewer(name="Party type", palette = "Greys")
app1

## FIGURE 2------

line <- ggplot(df_m, aes(as.Date(month,"%Y-%m-%d"), gruendl_sent_score)) +
  geom_point(aes(shape = party_type), alpha = 0.15) + 
  geom_smooth(aes(linetype = party_type), alpha = 0.5, color = "black", size = 0.8, fill = "gray") +
  guides(shape = guide_legend("Party type"), linetype = guide_legend("Party type")) +
  theme_classic() + labs(x="Month", y="Populist sentences in %") +
  scale_x_date(date_labels = "%Y", breaks = seq(as.Date("1990-01-01", "%Y"), as.Date("2020-01-01", format = "%Y"), 1500),
               name = NULL) +
  facet_grid(rows = vars(iso3country)) +
  coord_cartesian(ylim = c(0, 5)) #zoom in, outliers not removed
line

## Significance question-----

ttest(df_m$gruendl_sent_score, by = df_m$party_type) # overall: party types differ

# DEU
df_m_deup <- df_m_deu %>% 
  filter(
    party_group != "AfD"
  )

ttest(df_m_deup$gruendl_sent_score, by = df_m_deup$party_type) 

# LINKE (populist without AfD) and mainstream parties differ significantly

df_m_deup <- df_m_deu %>% 
  filter(
    party_type == "populist"
  )

ttest(df_m_deup$gruendl_sent_score, by = df_m_deup$party_group)

# AfD and LINKE differ significantly

# AUT

ttest(df_m_aut$gruendl_sent_score, by = df_m_aut$party_type)

# populist and mainstream parties differ

# For different times in Austria
df_m_autp <- df_m_aut %>% 
  filter(
    #year >= 1998 & year < 2006
    decade == "2000s"
  )

ttest(df_m_autp$gruendl_sent_score, by = df_m_autp$party_type)

df_m_autp <- df_m_aut %>% 
  filter(
    party_group == "ÖVP"
  ) %>% 
  mutate(after17 = cut(year, breaks = c(1995, 2017, 2019), # 2013 is diff, 2017 not
                       labels = c("before 2017", "after 2017")))

ttest(df_m_autp$gruendl_sent_score, by = df_m_autp$after17) # övp before and after Kurz


## FIGURE B2 Appendix-----

# AUT

df_m_aut <- df_m %>% 
  filter(iso3country == "AUT") %>% 
  group_by(party_group) %>% 
  mutate(
    govvary = if_else(any(cabinet_party == "government") & any(cabinet_party == "opposition"), "yes", "no")
  ) %>% 
  ungroup() %>% 
  mutate(
    party_group = fct_relevel(factor(party_group), c("ÖVP", "SPÖ", "FPÖ", "Grüne", "LIF", "BZÖ", "STRONACH", "NEOS"))
  )

colorsaut <- c("black", "red2", "royalblue3", "green3", "darkgoldenrod3", "orange", "yellow", "hotpink")

lineaut <- ggplot(df_m_aut, aes(as.Date(month,"%Y-%m-%d"), gruendl_sent_score, color = party_group
)) +
  geom_point(alpha = 0.2) + 
  geom_smooth(aes(color = party_group, fill = party_group), alpha = 0.2, size = 1) +
  guides(shape = guide_legend("Party"), linetype = guide_legend("Party")) +
  theme_classic() + labs(x="Month", y="Populist sentences in %", subtitle = "Austria") +
  scale_color_manual(name=NULL, values =  colorsaut) +
  scale_fill_manual(name=NULL, values =  colorsaut) +
  scale_x_date(date_labels = "%Y", breaks = seq(as.Date("1990-01-01", "%Y"), as.Date("2020-01-01", format = "%Y"), 1500),
               name = NULL) +
  coord_cartesian(ylim = c(0, 5)) #zoom in, outliers not removed
lineaut

# DEU

df_m_deu <- df_m %>% 
  filter(iso3country == "DEU") %>% 
  group_by(party_group) %>% 
  mutate(
    govvary = if_else(any(cabinet_party == "government") & any(cabinet_party == "opposition"), "yes", "no")
  ) %>% 
  ungroup() %>% 
  mutate(party_group = fct_relevel(factor(party_group), c("CDU/CSU", "SPD", "AfD", "FDP", "PDS/LINKE", "GRUENE")))

linedeu <- ggplot(df_m_deu, aes(as.Date(month,"%Y-%m-%d"), gruendl_sent_score, color = party_group
)) +
  geom_point(alpha = 0.2) + 
  geom_smooth(aes(color = party_group, fill = party_group), alpha = 0.2, size = 1) +
  guides(shape = guide_legend("Party"), linetype = guide_legend("Party")) +
  theme_classic() + labs(x="Month", y="Populist sentences in %", subtitle = "Germany") +
  scale_color_politics("german", name="") +
  scale_fill_politics("german", name="") +
  scale_x_date(date_labels = "%Y", breaks = seq(as.Date("1990-01-01", "%Y"), as.Date("2020-01-01", format = "%Y"), 1500),
               name = NULL) +
  coord_cartesian(ylim = c(0, 5)) #zoom in, outliers not removed
linedeu

# combine
g <- arrangeGrob(lineaut, linedeu, nrow = 2)
grid.arrange(g)

## Regression analyses --------

## Party type
mod_1 <- lm_robust(gruendl_sent_score ~ party_type
                   + populist_success_c + iso3country, 
                   data=df_m, 
                   clusters = party.facts.id, 
                   #fixed_effects = ~iso3country # no intercept when fixed effects specified
)
summary(mod_1)

## Party type and gov/opp
mod_2 <- lm_robust(gruendl_sent_score ~ party_type + cabinet_party
                   + populist_success_c + iso3country, 
                   data=df_m, clusters = party.facts.id
                   #fixed_effects = ~iso3country
)
summary(mod_2)

## Interaction Party type and gov/opp, with country FE
mod_3 <- lm_robust(gruendl_sent_score ~ party_type * cabinet_party + iso3country
                   + populist_success_c, 
                   data=df_m, clusters = party.facts.id
)
summary(mod_3) 

mod_4 <- lm_robust(gruendl_sent_score ~ vote_change + party_type + iso3country,
                   data = df_m,
                   clusters = party.facts.id
)

mod_5 <- lm_robust(gruendl_sent_score ~ vote_change * party_type + iso3country,
                    data = df_m,
                    clusters = party.facts.id
)
summary(mod_5) 

## TABLE C1 Appendix ----
cm <- c("(Intercept)" = "(Intercept)",
        "party_typemainstream" = "Mainstream party",
        "cabinet_partygovernment" = "Government member",
        "party_typemainstream:cabinet_partygovernment" = "Gov. x Mainstream",
        "vote_change" = "Vote change",
        "vote_change:party_typemainstream" = "Vote change x Mainstream",
        "populist_success_c" = "Populist vote share"
)

modelsummary(list(mod_1, 
                  mod_2, mod_3, 
                  mod_4, mod_5
),
coef_map = cm, 
stars = T, statistic = "({std.error})",
#output = "latex",
notes = list("Based on party-month aggregates of percentage of populist sentences in speeches.",
             "Party-clustered robust standard errors in parentheses.","Country FE are included.",
             "smaller sample because NAs for previous election results removed.",
             "Model 4-5: vote change between preceding election compared to the one before, and"))

## Office loss----
### FIGURE 3-----

gov <- ggpredict(mod_3, terms = c("cabinet_party", "party_type")) %>%
  ggplot(aes(as.numeric(x), predicted, color = group)) + 
  geom_point(position = position_dodge(.4), size=2)+ 
  geom_line(position = position_dodge(.4), linetype="dashed")+
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(.4), width = .15) +
  scale_x_continuous(name = NULL, breaks = c(1,2), labels = c("opposition", "government")) +
  scale_y_continuous(name = "Predicted populism in %") +
  scale_color_manual(name="Party type", labels=c("populist", "mainstream"),
  values =  c("black", "grey70"))
gov

# Significance question (despite overlapping CI)

# Within party types, by gov. participation: diff is sig.
df_m_mainstream <- filter(df_m, party_type == "mainstream")
ttest(df_m_mainstream$gruendl_sent_score, by = df_m_mainstream$cabinet_party)

df_m_populist <- filter(df_m, party_type == "populist")
ttest(df_m_populist$gruendl_sent_score, by = df_m_populist$cabinet_party)

# by country: significant for DEU and AUT (mainstream, and populist, which is only possible to compare in AUT)
df_m_mainstream <- filter(df_m, 
                          party_type == "mainstream" & iso3country == "DEU")
ttest(df_m_mainstream$gruendl_sent_score, by = df_m_mainstream$cabinet_party)

df_m_populist <- filter(df_m, 
                        party_type == "populist" & iso3country == "AUT")
ttest(df_m_populist$gruendl_sent_score, by = df_m_populist$cabinet_party)

### FIGURE B3 Appendix-----
# Opp. for individual parties

# For parties with variation
mod_5pa <- lm_robust(gruendl_sent_score ~ party_group * cabinet_party
                     + populist_success_c, 
                     data =  filter(df_m_aut, govvary == "yes")
)

pa <- ggpredict(mod_5pa, terms = c("cabinet_party", "party_group"))

# For only opposition or only gov
mod_5pao <- lm_robust(gruendl_sent_score ~ party_group * cabinet_party
                      + populist_success_c, 
                      data = filter(df_m_aut, govvary == "no")
)

# filter manually which combinations of gov/opp and party do not exist
pao <- ggpredict(mod_5pao, terms = c("cabinet_party", "party_group")) %>% 
  filter(group %in% c("Grüne", "LIF", "STRONACH", "NEOS") & x == "opposition" | group =="ÖVP" & x == "government")

p <- bind_rows(pa, pao) %>% 
  mutate(
    group = fct_relevel(group, c("ÖVP", "SPÖ", "FPÖ", "Grüne", "LIF", "BZÖ", "STRONACH", "NEOS")))

## colors
colorsaut <- c("black", "red2", "royalblue3", "green3", "darkgoldenrod3", "orange", "yellow", "hotpink")

autp <- ggplot(data = p, aes(as.numeric(x), predicted, color = group)) + 
  geom_point(position = position_dodge(.4), size=2)+ 
  geom_line(position = position_dodge(.4), linetype="dashed")+
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(.4), width = .15) +
  scale_x_continuous(name = NULL, breaks = c(1,2), labels = c("opposition", "government")) +
  scale_y_continuous(name = "Predicted populism in %", limits = c(0.8, 3.6))+
  theme(legend.position = "bottom") +
  scale_color_manual(values = colorsaut, name = "") +
  labs(subtitle = "Austria")
autp

# DEU

table(df_m_deu$govvary, df_m_deu$party_group)

# For parties with variation
mod_5pd <- lm_robust(gruendl_sent_score ~ party_group * cabinet_party
                     + populist_success_c, 
                     data =  filter(df_m_deu, govvary == "yes")
)

pd <- ggpredict(mod_5pd, terms = c("cabinet_party", "party_group"))

# For only opposition or only gov
mod_5pdo <- lm_robust(gruendl_sent_score ~ party_group
                      + populist_success_c, 
                      data = filter(df_m_deu, govvary == "no")
)

# filter manually which combinations of gov/opp and party do not exist
pdo <- ggpredict(mod_5pdo, terms = c("party_group")) %>% 
  mutate(
    group = x,
    x = "opposition",
    x = factor(x))

p_deu <- bind_rows(pd, pdo) %>% 
  mutate(
    group = fct_relevel(group, c("CDU/CSU", "SPD", "AfD", "FDP", "PDS/LINKE", "GRUENE"))
  )

deup <- ggplot(data = p_deu, aes(as.numeric(x), predicted, color = group)) +
  geom_point(position = position_dodge(.4), size=2) +
  geom_line(position = position_dodge(.4), linetype = "dashed")+
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(.4), width = .15) +
  scale_x_continuous(name = NULL, breaks = c(1,2), labels = c("opposition", "government")) +
  scale_y_continuous(name = "Predicted populism in %", limits = c(0.8, 3.6)) +
  theme(legend.position = "bottom") +
  scale_color_politics("german", name = "") +
  labs(subtitle = "Germany")
deup

# combine
g <- arrangeGrob(autp, deup, ncol = 2)
grid.arrange(g)

## Vote loss----

### FIGURE 4 -----

ggpredict(mod_5, terms = c("vote_change", "party_type")) %>%
  ggplot(aes(x, predicted, linetype = group)) +
  geom_line() +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .1) +
  scale_x_continuous(name = "Vote change in percentage points") +
  scale_y_continuous(name = "Predicted populism in %") +
  scale_color_brewer(name="Party", palette="Set1") +
  scale_linetype_discrete(name="Party type")

# Robustness Vote change

### FIGURE C3 Appendix------
# including election cycle year

mod_5_cycle <- lm_robust(gruendl_sent_score ~ vote_change * party_type * ecycle + iso3country,
                         data = df_m,
                         clusters = party.facts.id
)
summary(mod_5_cycle)

ggpredict(mod_5_cycle, terms = c("vote_change", "party_type", "ecycle")) %>%
  ggplot(aes(x, predicted, linetype = group)) +
  geom_line() +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .1) +
  scale_x_continuous(name = "Vote change in percentage points") +
  scale_y_continuous(name = "Predicted populism in %") +
  scale_color_brewer(name="Party", palette="Set1") +
  facet_wrap(~as.factor(facet)) +
  scale_linetype_discrete(name="Party type") +
  theme(legend.position = "bottom")

### FIGURE C1 Appendix-----
# with categories (2) of loss/win

mod_5_2cat <- lm_robust(gruendl_sent_score ~ vote_change_2 + party_type + iso3country,
                        data = df_m,
                        clusters = party.facts.id)

summary(mod_5_2cat)

mod_5b_2cat <- lm_robust(gruendl_sent_score ~ vote_change_2 * party_type + iso3country,
                         data = df_m,
                         clusters = party.facts.id)


ggpredict(mod_5b_2cat, terms = c("vote_change_2", "party_type")) %>%
  ggplot(aes(as.numeric(x), predicted, color = group)) + 
  geom_point(position = position_dodge(.4), size=2)+ 
  geom_line(position = position_dodge(.4), linetype="dashed")+
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(.4), width = .15) +
  scale_x_continuous(name = NULL, breaks = c(1,2), labels = c("loss", "win")) +
  scale_y_continuous(name = "Predicted populism in %") +
  scale_color_manual(name="Party type", labels=c("populist", "mainstream"),
                     values =  c("black", "grey70"))

# populist significant difference loss and win
ttest(df_m_populist$gruendl_sent_score, by = df_m_populist$vote_change_2)

### FIGURE C2 Appendix-----
# 4 categories

mod_5_4cat <- lm_robust(gruendl_sent_score ~ vote_change_4 + party_type + iso3country
                        + populist_success_c, 
                        data = df_m,
                        clusters = party.facts.id)


mod_5b_4cat <- lm_robust(gruendl_sent_score ~ vote_change_4 * party_type + iso3country
                         + populist_success_c,
                         data = df_m,
                         clusters = party.facts.id
)

votechange_cat <- ggpredict(mod_5b_4cat, terms = c("vote_change_4", "party_type")) %>%
  ggplot(aes(as.numeric(x), predicted, color = group)) + 
  geom_point(position = position_dodge(.4), size=2)+ 
  geom_line(position = position_dodge(.4), linetype="dashed")+
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(.4), width = .15) +
  scale_x_continuous(name = NULL, breaks = c(1,2,3,4), labels = c("large loss", "small loss", "small win", "large win")) +
  scale_y_continuous(name = "Predicted populism in %") +
  scale_color_manual(name="Party type", labels=c("populist", "mainstream"),
                     values =  c("black", "grey70"))

votechange_cat

### FIGURE B4 Appendix ----

# individual parties
## AUT
colorsaut <- c("black", "red2", "royalblue3", "green3", "darkgoldenrod3", "orange", "yellow", "hotpink")

vla <- ggplot(df_m_aut, aes(vote_change, gruendl_sent_score, color = party_group
)) +
  geom_point(alpha = 0.2) + 
  geom_smooth(aes(color = party_group, fill = party_group), method = "lm", alpha = 0.4, size = 0.8) +
  guides(shape = guide_legend("Party"), linetype = guide_legend("Party")) +
  theme_classic() + 
  labs(x="Vote change in percentage points", y="Populist sentences in %", subtitle = "Austria") +
  scale_color_manual(name=NULL, values =  colorsaut) +
  scale_fill_manual(name=NULL, values =  colorsaut) +
  guides(fill = "none")+
  coord_cartesian(ylim = c(0, 5)) + #zoom in, outliers not removed
  annotate(geom="curve", x=9, xend=7.5, y=3.3, yend=3, curvature=0.2, arrow=arrow(length=unit(2, "mm"))) +
  annotate(geom="text", x=9.2, y=3.3, label="Sole BZÖ \n vote change", hjust="left", size=3) 
vla

## DEU

vld <- ggplot(df_m_deu, aes(vote_change, gruendl_sent_score, color = party_group
)) +
  geom_point(alpha = 0.2) + 
  geom_smooth(aes(color = party_group, fill = party_group), method = "lm", alpha = 0.4, size = 0.8) +
  guides(shape = guide_legend("Party"), linetype = guide_legend("Party")) +
  theme_classic() + 
  labs(x="Vote change in percentage points", y="Populist sentences in %", subtitle = "Germany") +
  scale_color_politics("german", name=NULL) +
  scale_fill_politics("german", name=NULL, select = c("union", "spd", "fdp", "linke", "gruene")) +
  guides(fill = "none")+
  coord_cartesian(ylim = c(0, 5)) + #zoom in, outliers not removed
  annotate(geom="curve", x=6, xend=7.5, y=4, yend=3.8, curvature=0.2, arrow=arrow(length=unit(2, "mm"))) +
  annotate(geom="text", x=7, y=4.2, label="Sole AfD vote change", hjust="right", size=3)
vld

# combine
g <- arrangeGrob(vla, vld, nrow = 2)
grid.arrange(g)

### TABLE C2 Appendix------

cm <- c(
  "(Intercept)" = "(Intercept)",
  "party_typemainstream" = "Mainstream party",
  "vote_change_2win" = "Vote win (ref. loss)",
  "vote_change_2win:party_typemainstream" = "Vote win (ref. loss) x Mainstream",
  "vote_change_4small loss" = "Small vote loss (ref. large loss)",
  "vote_change_4small win" = "Small vote win (ref. large loss)",
  "vote_change_4large win" = "Large vote win (ref. large loss)",
  "vote_change_4small loss:party_typemainstream" = "Small loss x Mainstream",
  "vote_change_4small win:party_typemainstream" = "Small win x Mainstream",
  "vote_change_4large win:party_typemainstream" = "Large win x Mainstream",
  "ecycle" = "Year in electoral cycle",
  "vote_change:ecycle = Year in cycle x Vote change",
  "party_typemainstream:ecycle" = "Year in cycle x Mainstream",
  "vote_change:party_typemainstream:ecycle" = "Year cycl. x Mainstr. x Vote change"
)

modelsummary(list(
  "Model 6" = mod_5_2cat, 
  "Model 7" = mod_5b_2cat, 
  "Model 8" = mod_5_4cat, 
  "Model 9" = mod_5b_4cat,
  "Model 10" = mod_5_cycle),
  coef_map = cm,
  stars = T, statistic = "({std.error})",
  #output = "latex",
  notes = list("Based on party-month aggregates of percentage of populist sentences in speeches.",
               "Party-clustered robust standard errors in parentheses.","Country FE are included.",
               "Vote change between preceding election compared to the one before."))

## Appendix C3 Populist success---------

# FIGURE C4
mod_10a <- lm_robust(gruendl_sent_score ~ populist_success_c + party_type + iso3country,
                    data = df_m,
                    clusters = party.facts.id)

summary(mod_10a)

mod_10 <- lm_robust(gruendl_sent_score ~ populist_success_c * party_type + iso3country,
                   data = df_m,
                   clusters = party.facts.id)

summary(mod_10)

popsuc <- ggpredict(mod_10, terms = c("populist_success_c", "party_type")) %>%
  ggplot(aes(x, predicted, linetype = group)) +
  geom_line() +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .1) +
  scale_x_continuous(name = "Populist vote share (mean-centered)") +
  scale_y_continuous(name = "Predicted populism in %") +
  scale_linetype_discrete(name="Party type")

popsuc


# TABLE C3

cm <- c("(Intercept)" = "(Intercept)",
        "populist_success_c" = "Populist vote share",
        "party_typemainstream" = "Mainstream party",
        "populist_success_c:party_typemainstream" = "Populist success x Mainstream"
)

modelsummary(list("Model 10" = mod_10),
             coef_map = cm,
             stars = T, statistic = "({std.error})",
             #output = "latex",
             notes = list("Based on party-month aggregates of percentage of populist sentences in speeches.",
                          "Party-clustered robust standard errors in parentheses.","Country FE are included.")
)

## Appendix C4 Switch gov. opp.------

# FIGURE C5

mod_11 <- lm_robust(gruendl_sent_score ~ party_type * cabinet_party * previous_cabinet_party + iso3country
                   + populist_success_c,
                   data = df_m, clusters = party.facts.id
)
summary(mod_11)

govswitch <- ggpredict(mod_11, terms = c("cabinet_party", "previous_cabinet_party", "party_type")) %>%
  ggplot(aes(as.numeric(x), predicted, color = group)) + 
  geom_point(position = position_dodge(.4), size=2)+ 
  geom_line(position = position_dodge(.4), linetype="dashed")+
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(.4), width = .2) +
  facet_wrap(~facet) +
  scale_x_continuous(name = "Current gov. status", breaks = c(1,2), labels = c("opp.", "gov.")) +
  scale_y_continuous(name = "Predicted populism in %", limits = c(0.6, 2.2)) +
  scale_color_manual(name="Previous gov. status", values=c("black", "grey70"))
govswitch

# TABLE C4
cm <- c("(Intercept)" = "(Intercept)",
        "party_typemainstream" = "Mainstream party",
        "cabinet_partygovernment" = "Government member",
        "party_typemainstream:cabinet_partygovernment" = "Gov. x Mainstream",
        "previous_cabinet_partygovernment" = "Previously in gov.",
        "party_typemainstream:previous_cabinet_partygovernment" = "Previous gov. x Mainstream",
        "cabinet_partygovernment:previous_cabinet_partygovernment" = "Gov. x previous gov.",
        "party_typemainstream:cabinet_partygovernment:previous_cabinet_partygovernment" = "Gov. x prev. gov. x Mainstream"
)

modelsummary(list("Model 11" = mod_11),
             coef_map = cm,
             stars = T, statistic = "({std.error})",
             #output = "latex",
             notes = list("Based on party-month aggregates of percentage of populist sentences in speeches.",
                          "Party-clustered robust standard errors in parentheses.","Country FE are included.",
                          "Smaller sample because NAs for previous gov. status removed.")
)

# F. OTHER APPENDIX CONTENT-----

## TABLE A1 Dictionary----

gruendl <- popdictR::gruendl_terms

gruendl %>%
  paste(collapse = ", ") %>%
  kbl(
    #format = "latex",
      caption = "Populism dictionary by Gründl (2020)",
      escape = T, booktabs = T, longtable = F) %>%
  kable_styling()


## TABLE A 2 Frequent terms -----

gruendl_dict <- dictionary(list(gruendl = gruendl))

patterns_gruendl_sent <- get_pattern_stats(speech_corp_s, 
                                           patterns = gruendl_dict,
                                           at_level = "sentences")

row.names(patterns_gruendl_sent) <- NULL

patterns_gruendl_sent <- patterns_gruendl_sent %>%
  select(original_pattern, match_count) %>%
  arrange(-match_count)

patterns_gruendl_sent %>% head(n = 20) %>%
  kbl(
    #format = "latex", 
    booktabs = F,
    format.args = list(big.mark = ",")) %>% 
  kable_styling()

## TABLE A3 Validation----

# Random selection of speeches
set.seed(1111)

sample <- df_dict2 %>% 
  mutate(score_not_zero = if_else(gruendl_sent_score > 0, "above_zero", "zero")) %>% 
  group_by(party_type, score_not_zero) %>% 
  select(speaker, date, party_group, party_type, iso3country, score_not_zero, doc_id, gruendl_sent_score, gruendl_sentcount, n_sentences) %>% 
  slice_sample(n = 5)

sampled_ids <- sample$doc_id

# for speech text and speaker export: score of dictionary unknown
df_sample <- speeches %>% 
  select(doc_id, date, speaker, party_group, party_type, iso3country, session, text) %>% 
  filter(doc_id %in% sampled_ids)

# results manual content analysis
manual <- read_excel("data/manual/results_manual.xlsx")

manual <- manual %>% 
  mutate(
    populism = antielit + pplecent + antielit_soft + pplecent_soft,
    soft_populism = antielit_soft + pplecent_soft,
    strong_populism = antielit + pplecent,
    any_populism = if_else(populism == 0, "not populist", "populist")
  )

# merge with gruendl_score
manual_comb <- left_join(manual, sample, by = "doc_id") %>% 
  mutate(
    populism_sent_score = (populism / n_sentences) * 100,
    soft_populism_sent_score = (soft_populism / n_sentences) * 100,
    strong_populism_sent_score = (strong_populism / n_sentences) * 100
  )

manual_comb %>% 
  select(speaker, party_group, iso3country, date, gruendl_sent_score, gruendl_sentcount, populism_sent_score, populism) %>% 
  rename(
    Speaker = speaker,
    Party = party_group,
    Country = iso3country,
    Date = date, 
    `Dictionary score` = gruendl_sent_score,
    `Dictionary matches (n)` = gruendl_sentcount,
    `Manual score` = populism_sent_score,
    `Manual matches (n)` = populism
  ) %>% 
  arrange(Country, Date) %>% 
  kbl(
    #format = "latex", 
    booktabs = T, digits = 2) %>% 
  kable_styling()

# correlation
cor.test(manual_comb$gruendl_sentcount, manual_comb$populism)

# TABLE A4
table(manual_comb$any_populism, manual_comb$score_not_zero)

## Descriptive statistics-----

### TABLE B1 Appendix------

df_mdescr <- df_m %>%
  select(
    `Populism degree` = gruendl_sent_score, # (% of sentences containing a populist term, party-month average)
    `Vote change (continuous)` = vote_change,
    `Party type` = party_type,
    `Country` = iso3country,
    `Government participation` = cabinet_party,
    `Previous gov. participation` = previous_cabinet_party,
    `Vote change categories` = vote_change_4
  )

datasummary(data = df_mdescr,
            `Party type` + `Country` + `Government participation` + 
              `Previous gov. participation` + `Vote change categories` +
              1 ~  N + Percent()
            #output = 'latex'
)

### TABLE B2 Appendix----

datasummary_skim(data = df_mdescr, output = "latex")

